This file describes sites, locations, number of lanes, etc.
Use this to determine an interesting site and its upstream and downstream sites.
import xmltodict
import pandas as pd
from pathlib import Path
from dask.distributed import Client
import dask.dataframe as dd
import numpy as np
import holoviews as hv
import hvplot.pandas
pd.options.plotting.backend = 'holoviews'
client = Client()
client
Client-e5babe94-4bee-11ec-aedc-94659cb1401c
| Connection method: Cluster object | Cluster type: distributed.LocalCluster |
| Dashboard: http://127.0.0.1:8787/status |
98e6c21d
| Dashboard: http://127.0.0.1:8787/status | Workers: 4 |
| Total threads: 4 | Total memory: 11.90 GiB |
| Status: running | Using processes: True |
Scheduler-79900d1a-27f2-47fb-a3e5-bad8234f98f1
| Comm: tcp://127.0.0.1:50194 | Workers: 4 |
| Dashboard: http://127.0.0.1:8787/status | Total threads: 4 |
| Started: Just now | Total memory: 11.90 GiB |
| Comm: tcp://192.168.4.22:50225 | Total threads: 1 |
| Dashboard: http://192.168.4.22:50227/status | Memory: 2.97 GiB |
| Nanny: tcp://127.0.0.1:50197 | |
| Local directory: C:\projects\nrel-presentation\src\data\dask-worker-space\worker-_ka49h28 | |
| Comm: tcp://192.168.4.22:50222 | Total threads: 1 |
| Dashboard: http://192.168.4.22:50228/status | Memory: 2.97 GiB |
| Nanny: tcp://127.0.0.1:50200 | |
| Local directory: C:\projects\nrel-presentation\src\data\dask-worker-space\worker-2vo4ducw | |
| Comm: tcp://192.168.4.22:50223 | Total threads: 1 |
| Dashboard: http://192.168.4.22:50229/status | Memory: 2.97 GiB |
| Nanny: tcp://127.0.0.1:50198 | |
| Local directory: C:\projects\nrel-presentation\src\data\dask-worker-space\worker-mxz_2m96 | |
| Comm: tcp://192.168.4.22:50224 | Total threads: 1 |
| Dashboard: http://192.168.4.22:50226/status | Memory: 2.97 GiB |
| Nanny: tcp://127.0.0.1:50199 | |
| Local directory: C:\projects\nrel-presentation\src\data\dask-worker-space\worker-axuadpl5 | |
root_dir = Path('c:/Users/David/OneDrive//projects/portfolio/traffic')
meta_file = Path(root_dir / 'working/FASTmetadata.xml')
with open(meta_file) as f:
xml_data = f.read()
raw_data = xmltodict.parse(xml_data)
# make map of detector locations
# each point is a tuple (longitude, latitude, label)
all_detectors = raw_data['DetectorInventory']['detector-list']['detector']
D = [()] * len(all_detectors)
for i, detector in enumerate(all_detectors):
this_lon = float(detector['detector-location']['longitude'])
this_lat = float(detector['detector-location']['latitude'])
if (this_lon < -1.0) & (this_lat > 1.0):
D[i] = (this_lon, this_lat, detector['detector-id'])
D[500]
(-115.322273254395, 35.7851676940918, '444_1_100')
import geoviews as gv
import geoviews.feature as gf
from geoviews import opts
from cartopy import crs
import geoviews.tile_sources as gts
from bokeh.models import HoverTool
gv.extension('bokeh', 'matplotlib')
points = gv.Points(D, kdims=['longitude', 'latitude'], vdims='detector-id', label='detector-id')
points.opts(size=12, marker='o', color='black', fill_color=None, line_width=1, tools=['hover'], show_legend=False)
imagery_tiles = gv.tile_sources.EsriImagery(alpha=0.75)
osm_tiles = gv.tile_sources.OSM().opts(alpha=0.5)
layout = imagery_tiles * osm_tiles * points
layout.opts(width=1000, height=700, title='Las Vegas Area Highway Speed Detectors')
hv.save(layout, root_dir / 'reports/figures/detector_locations.png', fmt='png')
from holoviews import opts
hv.help(opts.Points)
Help on method builder in module holoviews.util: builder(*, active_tools, align, alpha, angle, apply_extents, apply_ranges, aspect, axiswise, backend, bgcolor, border, cformatter, clabel, clim, clim_percentile, clipping_colors, cmap, cnorm, color, color_levels, colorbar, colorbar_opts, colorbar_position, data_aspect, default_span, default_tools, fill_alpha, fill_color, fixed_bounds, fontscale, fontsize, frame_height, frame_width, framewise, global_extent, gridstyle, height, hooks, hover_alpha, hover_color, hover_fill_alpha, hover_fill_color, hover_line_alpha, hover_line_color, infer_projection, invert_axes, invert_xaxis, invert_yaxis, jitter, labelled, legend_cols, legend_muted, legend_offset, legend_opts, legend_position, line_alpha, line_cap, line_color, line_dash, line_join, line_width, lod, logx, logy, logz, margin, marker, max_height, max_width, min_height, min_width, muted_alpha, muted_color, muted_fill_alpha, muted_fill_color, muted_line_alpha, muted_line_color, nonselection_alpha, nonselection_color, nonselection_fill_alpha, nonselection_fill_color, nonselection_line_alpha, nonselection_line_color, normalize, padding, palette, projection, responsive, selected, selection_alpha, selection_color, selection_fill_alpha, selection_fill_color, selection_line_alpha, selection_line_color, shared_axes, shared_datasource, show_bounds, show_frame, show_grid, show_legend, show_title, size, symmetric, title, toolbar, tools, visible, width, xaxis, xformatter, xlabel, xlim, xrotation, xticks, yaxis, yformatter, ylabel, ylim, yrotation, yticks, zlim) method of param.parameterized.ParameterizedMetaclass instance
raw_data['DetectorInventory']['detector-list'].keys()
odict_keys(['detector'])
list_of_detectors = raw_data['DetectorInventory']['detector-list']['detector']
list_of_detectors[100]
OrderedDict([('detector-id', '112_2_44'),
('organization-id', 'NDOT-TTID'),
('station-id', '44'),
('detector-name',
'I-15 NB between I-15 NB Charleston EB Off-Ramp and I-15 NB Charleston WB Off-Ramp'),
('detector-location',
OrderedDict([('latitude', '36.1563301086426'),
('longitude', '-115.159843444824')])),
('route-designator', 'I-15 NB'),
('detector-direction', '1'),
('linear-reference', '41.61'),
('detector-type', '5'),
('measurement-duration', '60'),
('class1-count-length', '8'),
('class2-count-length', '18'),
('class3-count-length', '24'),
('class4-count-length', '40'),
('class5-count-length', '80'),
('class6-count-length', '100'),
('detection-lane',
OrderedDict([('detection-lane-item',
[OrderedDict([('lane-number', '1'),
('detection-lane-status', '1'),
('lane-type', 'Freeway')]),
OrderedDict([('lane-number', '2'),
('detection-lane-status', '1'),
('lane-type', 'Freeway')]),
OrderedDict([('lane-number', '3'),
('detection-lane-status', '1'),
('lane-type', 'Freeway')]),
OrderedDict([('lane-number', '4'),
('detection-lane-status', '1'),
('lane-type', 'Freeway')]),
OrderedDict([('lane-number', '5'),
('detection-lane-status', '1'),
('lane-type', 'Freeway')])])]))])
len(list_of_detectors)
692
d = list_of_detectors[10]
a = [d['detector-id'],
d['detector-name'],
d['route-designator'],
d['detector-direction'],
d['linear-reference'],
len(d['detection-lane']['detection-lane-item'])]
print(a)
columns = ['id', 'name', 'route', 'direction', 'mile', 'numlanes']
['9_1_3', 'I-215 WB between I-15 SB to I-215 WB/EB Ramp and I-215 WB Decatur Off-Ramp', 'I-215 WB', '7', '99.55', 4]
columns = ['id', 'name', 'route', 'direction', 'mile', 'numlanes']
print(columns)
all_detectors = []
for d in list_of_detectors:
a = [d['detector-id'],
d['detector-name'],
d['route-designator'],
d['detector-direction'],
d['linear-reference'],
len(d['detection-lane']['detection-lane-item'])]
all_detectors.append(a)
['id', 'name', 'route', 'direction', 'mile', 'numlanes']
D = pd.DataFrame(all_detectors, columns=columns)
D.to_excel(Path('c:/projects/nrel-presentation/working/FASTmetadata.xlsx'), index=False)
after looking at excel file, these are the sites we'll us:
site to predict: 355_2_135
from upstream to downstream:
355_1_156, 355_2_153, 355_3_155
miles 31.48, 31.84, 32.25
D
| id | name | route | direction | mile | numlanes | |
|---|---|---|---|---|---|---|
| 0 | 2_1_268 | I-215 EB between I-215 EB Decatur Off-Ramp and... | I-215 EB | 3 | 101.41 | 4 |
| 1 | 2_2_267 | I-215 EB between I-215 EB Decatur Off-Ramp and... | I-215 EB | 3 | 100.56 | 4 |
| 2 | 3_2_1 | I-215 EB Decatur On-Ramp between Decatur Road ... | I-215 EB Decatur On-Ramp | 3 | 98.604 | 3 |
| 3 | 4_1_1 | I-215 EB between I-215 EB Decatur On-Ramp and ... | I-215 EB | 3 | 100.31 | 4 |
| 4 | 4_2_4 | I-215 EB between I-215 EB Decatur On-Ramp and ... | I-215 EB | 3 | 100.04 | 4 |
| ... | ... | ... | ... | ... | ... | ... |
| 687 | 585_1_432 | I-15 SB 215 CC On Ramp between 215 CC and I-15 SB | I-15 SB 215 CC On Ramp | 5 | 34.5 | 3 |
| 688 | 586_1_434 | I-15 SB 215 CC Off Ramp between I-15 SB and 21... | I-15 SB 215 CC Off Ramp | 5 | 35.5 | 3 |
| 689 | 587_1_427 | I-15 SB Lamb On Ramp between Lamb and I-15 SB | I-15 SB Lamb On Ramp | 5 | 32.5 | 3 |
| 690 | 588_1_422 | I-15 SB Lamb Off Ramp between I-15 SB and Lamb | I-15 SB Lamb Off Ramp | 5 | 33.5 | 3 |
| 691 | 589_1_410 | I-15 SB Craig Off Ramp between I-15 SB and Cra... | I-15 SB Craig Off Ramp | 5 | 31.5 | 3 |
692 rows × 6 columns
D[D['id'].isin(['358_1_325', '358_2_320', '358_3_319'])]
| id | name | route | direction | mile | numlanes | |
|---|---|---|---|---|---|---|
| 267 | 358_1_325 | I-15 NB between Erie Avenue and Barbara Lane | I-15 NB | 1 | 28.16 | 3 |
| 268 | 358_2_320 | I-15 NB between Erie Avenue and Barbara Lane | I-15 NB | 1 | 28.48 | 3 |
| 269 | 358_3_319 | I-15 NB between Erie Avenue and Barbara Lane | I-15 NB | 1 | 28.85 | 3 |
# plot data for one route
this_route = 'I-15 NB'
D_I15NB = D[D['route']=='I-15 NB'].copy()
D_I15NB['mile'] = D_I15NB['mile'].astype('float')
D_I15NB['numlanes'] = D_I15NB['numlanes'].astype('int')
D_I15NB.sort_values(by='mile', inplace=True)
D_I15NB
| id | name | route | direction | mile | numlanes | |
|---|---|---|---|---|---|---|
| 520 | 451_1_99 | I-15 NB between 11.71 Miles south of Sloan Roa... | I-15 NB | 1 | 0.00 | 2 |
| 518 | 450_1_99 | I-15 NB between 10.71 Miles south of Sloan Roa... | I-15 NB | 1 | 0.40 | 2 |
| 519 | 450_2_359 | I-15 NB between 10.71 Miles south of Sloan Roa... | I-15 NB | 1 | 0.90 | 3 |
| 515 | 449_1_358 | I-15 NB between 9.71 Miles south of Sloan Road... | I-15 NB | 1 | 1.41 | 3 |
| 516 | 449_2_357 | I-15 NB between 9.71 Miles south of Sloan Road... | I-15 NB | 1 | 2.01 | 3 |
| ... | ... | ... | ... | ... | ... | ... |
| 388 | 402_2_439 | I-15 NB between Clark County 215 and Centennia... | I-15 NB | 1 | 52.57 | 2 |
| 389 | 402_3_443 | I-15 NB between Clark County 215 and Centennia... | I-15 NB | 1 | 52.87 | 3 |
| 390 | 402_4_444 | I-15 NB between Clark County 215 and Centennia... | I-15 NB | 1 | 53.39 | 2 |
| 391 | 403_1_447 | I-15 NB between Centennial Parkway and Speedwa... | I-15 NB | 1 | 53.94 | 2 |
| 392 | 403_3_448 | I-15 NB between Centennial Parkway and Speedwa... | I-15 NB | 1 | 54.80 | 2 |
114 rows × 6 columns
scatter = D_I15NB.hvplot.scatter(x='mile', y='direction', hover_cols='all')
scatter.opts(color='blue', size=hv.dim('numlanes')**1.5, width=1200, alpha=0.5, line_color='black')
scatter
For each parquet file, read all data from I-15 NB stations
D.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 692 entries, 0 to 691 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 id 692 non-null object 1 name 692 non-null object 2 route 692 non-null object 3 direction 692 non-null object 4 mile 692 non-null object 5 numlanes 692 non-null int64 dtypes: int64(1), object(5) memory usage: 32.6+ KB
# change dtypes
D = D.astype({'mile': float})
D.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 692 entries, 0 to 691 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 id 692 non-null object 1 name 692 non-null object 2 route 692 non-null object 3 direction 692 non-null object 4 mile 692 non-null float64 5 numlanes 692 non-null int64 dtypes: float64(1), int64(1), object(4) memory usage: 32.6+ KB
idx = (D['route'] == 'I-15 NB') & (2.0 < D['mile']) & (D['mile'] < 30.0)
all_stations = D[idx]['id'].values
pqdir = Path('c:/projects/nrel-presentation/data/interim/')
df = dd.read_parquet(pqdir / '*.pq',
columns=['detector_id', 'timestamp', 'lane_vehicle_speed', 'lane_vehicle_count', 'occupancy'])
df = df[df['detector_id'].isin(all_stations)].compute()
df
| detector_id | timestamp | lane_vehicle_speed | lane_vehicle_count | occupancy | |
|---|---|---|---|---|---|
| 544 | 357_3_310 | 2021-10-26 20:01:27 | 86 | 3 | 1 |
| 545 | 357_3_310 | 2021-10-26 20:01:27 | 73 | 5 | 6 |
| 546 | 357_3_310 | 2021-10-26 20:01:27 | 74 | 6 | 4 |
| 1210 | 437_3_327 | 2021-10-26 20:01:27 | 149 | 0 | 0 |
| 1211 | 437_3_327 | 2021-10-26 20:01:27 | 149 | 0 | 0 |
| ... | ... | ... | ... | ... | ... |
| 1267 | 448_1_355 | 2021-11-08 10:54:16 | 70 | 3 | 1 |
| 1268 | 448_1_355 | 2021-11-08 10:54:16 | 73 | 2 | 1 |
| 1269 | 448_3_353 | 2021-11-08 10:54:16 | 77 | 6 | 1 |
| 1270 | 448_3_353 | 2021-11-08 10:54:16 | 73 | 7 | 2 |
| 1271 | 448_3_353 | 2021-11-08 10:54:16 | 75 | 3 | 2 |
1054158 rows × 5 columns
df.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 1054158 entries, 544 to 1271 Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 detector_id 1054158 non-null object 1 timestamp 1054158 non-null datetime64[ns] 2 lane_vehicle_speed 1054158 non-null int64 3 lane_vehicle_count 1054158 non-null int64 4 occupancy 1054158 non-null int64 dtypes: datetime64[ns](1), int64(3), object(1) memory usage: 48.3+ MB
all_stations
array(['357_1_312', '357_2_311', '357_3_310', '358_1_325', '358_2_320',
'358_3_319', '359_1_325', '437_1_101', '437_3_327', '438_1_331',
'438_2_330', '438_3_329', '439_1_334', '439_2_333', '439_3_332',
'440_1_335', '441_1_338', '441_2_337', '441_3_336', '442_1_341',
'442_2_340', '442_3_339', '443_1_344', '443_2_343', '443_3_342',
'444_1_100', '444_2_100', '444_3_345', '445_1_348', '445_2_347',
'445_3_346', '446_1_350', '446_2_78', '446_3_349', '447_1_352',
'447_2_351', '447_3_77', '448_1_355', '448_2_354', '448_3_353',
'449_2_357', '449_3_356'], dtype=object)
df.set_index('timestamp', inplace=True)
# speeds > 120 = np.nan
idx = df['lane_vehicle_speed'] > 119
df.loc[idx, 'lane_vehicle_speed'] = np.nan
pd.options.plotting.backend = 'holoviews'
import hvplot.pandas
this_df = df[df['detector_id'].isin(all_stations[10:14])]
this_df[['detector_id', 'lane_vehicle_speed']].hvplot.scatter(by='detector_id',
title='Lane Vehicle Speed',
ylabel='Speed (mph)',
xlabel='Date',
alpha=0.5,
size=10,
width=800,
height=500)
this_df2 = df[df['detector_id'].isin(['447_2_351', '446_1_350', '446_3_349'])]
this_df2[['detector_id', 'lane_vehicle_speed']].hvplot.line(by='detector_id',
#title='Lane Vehicle Speed',
ylabel='Speed (mph)',
xlabel='Date',
ylim=(0, 120),
size=10,
width=900,
height=250,
subplots=True).cols(1)
df.groupby('detector_id').count()
| lane_vehicle_speed | lane_vehicle_count | occupancy | |
|---|---|---|---|
| detector_id | |||
| 357_3_310 | 43398 | 45654 | 45654 |
| 358_1_325 | 57499 | 61220 | 61220 |
| 437_3_327 | 0 | 49251 | 49251 |
| 438_1_331 | 50667 | 50667 | 50667 |
| 438_2_330 | 0 | 25302 | 25302 |
| 438_3_329 | 46048 | 49023 | 49023 |
| 439_1_334 | 47746 | 50529 | 50529 |
| 439_2_333 | 27454 | 27801 | 27801 |
| 439_3_332 | 47673 | 50680 | 50680 |
| 440_1_335 | 0 | 9 | 9 |
| 441_1_338 | 49920 | 49920 | 49920 |
| 441_3_336 | 39699 | 50622 | 50622 |
| 442_2_340 | 46966 | 49803 | 49803 |
| 443_1_344 | 49923 | 49923 | 49923 |
| 443_3_342 | 42033 | 49002 | 49002 |
| 445_1_348 | 0 | 25455 | 25455 |
| 445_2_347 | 46677 | 49896 | 49896 |
| 445_3_346 | 49893 | 49893 | 49893 |
| 446_1_350 | 46844 | 49917 | 49917 |
| 446_3_349 | 46829 | 49923 | 49923 |
| 447_2_351 | 46908 | 49926 | 49926 |
| 447_3_77 | 0 | 17106 | 17106 |
| 448_1_355 | 45365 | 48537 | 48537 |
| 448_2_354 | 0 | 6045 | 6045 |
| 448_3_353 | 44786 | 48054 | 48054 |